clear

set more off

********Data from Nekarda LPD database********

*use "lpd.dta"

keep if mdate>=tm(1994m1)

keep if age>=16 & !missing(age)

drop if educ==-1



gen exper = age - 16 if educ==1

replace exper = age- 18 if educ==2

replace exper = age - 21 if educ ==3 

replace exper = age - 24 if educ == 4

gen exper_sq = exper*exper

gen white = (race==10 | race==11)
 
gen black = (race==20 | race==21)

gen hispanic = (race==11 | race==21 | race ==31)

gen female = (sex==0)

gen married = (marstat==1)

gen widowed_divorced = (marstat==2)

bys pid (mis): gen u_to_e_next_period = 1 if lft[_n+1] == 4 & mis == mis[_n+1] - 1 & match[_n+1] ==1 & mis != 4 & !missing(udur) & udur>=0 & pid[_n] == pid[_n+1]
bys pid (mis): replace u_to_e_next_period = 0 if (lft[_n+1] == 5 | lft[_n+1]==6) & mis == mis[_n+1] - 1 & mis != 4 & match[_n+1] == 1 &!missing(udur) & udur>=0 & pid[_n] == pid[_n+1]


***Census***********
gen occ = 1 if peio1ocd > 0 & peio1ocd <951 & mdate>=tm(2003m1) & mdate<=tm(2010m12)
replace occ = 2 if peio1ocd > 999 & peio1ocd <3550 & mdate>=tm(2003m1) & mdate<=tm(2010m12)
replace occ = 3 if peio1ocd> 3550 & peio1ocd <4670 & mdate>=tm(2003m1) & mdate<=tm(2010m12)
replace occ = 4 if peio1ocd > 4670 & peio1ocd <4970 & mdate>=tm(2003m1) & mdate<=tm(2010m12)
replace occ = 5 if peio1ocd > 4970 & peio1ocd <5940 & mdate>=tm(2003m1) & mdate<=tm(2010m12)
replace occ = 6 if peio1ocd > 5940 & peio1ocd <6150 & mdate>=tm(2003m1) & mdate<=tm(2010m12)
replace occ = 7 if peio1ocd > 6150 & peio1ocd <6950 & mdate>=tm(2003m1) & mdate<=tm(2010m12)
replace occ = 8 if peio1ocd > 6950 & peio1ocd <7630 & mdate>=tm(2003m1) & mdate<=tm(2010m12)
replace occ = 9 if peio1ocd > 7650 & peio1ocd <8970 & mdate>=tm(2003m1) & mdate<=tm(2010m12)
replace occ = 10 if peio1ocd > 8970 & peio1ocd <9800 & mdate>=tm(2003m1) & mdate<=tm(2010m12)
replace occ = 11 if peio1ocd ==9840 & !missing(peio1ocd) & mdate>=tm(2003m1) & mdate<=tm(2010m12)

replace occ = 1 if peio1ocd > 0 & peio1ocd <43 & mdate>=tm(1992m1) & mdate<=tm(2002m12)
replace occ = 2 if peio1ocd > 42 & peio1ocd <202 & mdate>=tm(1992m1) & mdate<=tm(2002m12)
replace occ = 4 if peio1ocd > 242 & peio1ocd <303 & mdate>=tm(1992m1) & mdate<=tm(2002m12)
replace occ = 5 if peio1ocd > 302 & peio1ocd <392 & mdate>=tm(1992m1) & mdate<=tm(2002m12)
replace occ = 3 if peio1ocd > 402 & peio1ocd <470 & mdate>=tm(1992m1) & mdate<=tm(2002m12)
replace occ = 6 if peio1ocd > 472 & peio1ocd <500 & mdate>=tm(1992m1) & mdate<=tm(2002m12)
replace occ = 7 if peio1ocd > 552 & peio1ocd <600 & mdate>=tm(1992m1) & mdate<=tm(2002m12)
replace occ = 8 if peio1ocd > 501 & peio1ocd <550 & mdate>=tm(1992m1) & mdate<=tm(2002m12)
replace occ = 9 if peio1ocd > 600 & peio1ocd <802 & mdate>=tm(1992m1) & mdate<=tm(2002m12)
replace occ = 10 if peio1ocd > 802 & peio1ocd <891 & mdate>=tm(1992m1) & mdate<=tm(2002m12)
replace occ = 11 if  peio1ocd > 891 & mdate>=tm(1992m1) & mdate<=tm(2001m12)

replace occ = 1 if peio1ocd > 0 & peio1ocd <951 & mdate>=tm(2011m1)
replace occ = 2 if peio1ocd > 1000 & peio1ocd <3540 & mdate>=tm(2011m1)
replace occ = 4 if peio1ocd > 4699 & peio1ocd <4966 & mdate>=tm(2011m1)
replace occ = 5 if peio1ocd > 4999 & peio1ocd <5941 & mdate>=tm(2011m1)
replace occ = 3 if peio1ocd > 3599 & peio1ocd < 4651 & mdate>=tm(2011m1)
replace occ = 6 if peio1ocd > 6000 & peio1ocd < 6131 & mdate>=tm(2011m1)
replace occ = 7 if peio1ocd > 6199 & peio1ocd < 6941 & mdate>=tm(2011m1)
replace occ = 8 if peio1ocd > 6999 & peio1ocd < 7631 & mdate>=tm(2011m1)
replace occ = 9 if peio1ocd > 7699 & peio1ocd < 8966 & mdate>=tm(2011m1)
replace occ = 10 if peio1ocd > 8966 & peio1ocd <9751 & mdate>=tm(2011m1)
replace occ = 11 if  peio1ocd > 9751 & mdate>=tm(2011m1)


****

label define occ 1 "Management, business,.." 2 "Professional and Related" 3 "Services" 4 "Sales and related" 5 "office and administrative support" 6 "Farming fishing" 7 "Construction" 8 "Installation" 9 "Production" 10 "Transportation and Material moving" 11 "Armed Forces" 

label values occ occ





***************industries from census******************
gen ind = 1 if peio1icd == 770 & peio1icd <=1060 & mdate>=tm(2013m1) 
replace ind = 3 if peio1icd >= 4070 & peio1icd <=5790 & mdate>=tm(2013m1) 
replace ind = 2 if peio1icd >= 1070 & peio1icd <=3990  & mdate>=tm(2013m1) 
replace ind = 4 if peio1icd >= 6870 & peio1icd <= 7190 & mdate>=tm(2013m1) 
replace ind = 5 if peio1icd >= 7270 & peio1icd <= 7790  & mdate>=tm(2013m1) 
replace ind = 6 if peio1icd >= 7860 & peio1icd <=7890 & mdate>=tm(2013m1) 
replace ind = 7 if peio1icd >= 7970 & peio1icd <=8470 & mdate>=tm(2013m1) 
replace ind = 8 if peio1icd >= 8560 & peio1icd <=8690 & mdate>=tm(2013m1) 
replace ind = 9 if missing(ind) & !missing(peio1icd) & peio1icd>0 & mdate>=tm(2013m1) 


***************new industries from census******************
replace ind = 1 if peio1icd == 770 & mdate>=tm(2008m1) & mdate<=tm(2012m12)
replace ind = 3 if peio1icd >= 4070 & peio1icd <=5790 & mdate>=tm(2008m1) & mdate<=tm(2012m12)
replace ind = 2 if peio1icd >= 1070 & peio1icd <=3990  & mdate>=tm(2008m1) & mdate<=tm(2012m12)
replace ind = 4 if peio1icd >= 6870 & peio1icd <= 7190 & mdate>=tm(2008m1) & mdate<=tm(2012m12)
replace ind = 5 if peio1icd >= 7270 & peio1icd <= 7790  & mdate>=tm(2008m1) & mdate<=tm(2012m12)
replace ind = 6 if peio1icd >= 7860 & peio1icd <=7890 & mdate>=tm(2008m1) & mdate<=tm(2012m12)
replace ind = 7 if peio1icd >= 7970 & peio1icd <=8470 & mdate>=tm(2008m1) & mdate<=tm(2012m12)
replace ind = 8 if peio1icd >= 8560 & peio1icd <=8690 & mdate>=tm(2008m1) & mdate<=tm(2012m12)
replace ind = 9 if missing(ind) & !missing(peio1icd) & peio1icd>0 & mdate>=tm(2008m1) & mdate<=tm(2012m12)


***************new industries from census******************
replace ind = 1 if peio1icd >= 770 & peio1icd <=1060 & mdate>=tm(2003m1) & mdate<=tm(2007m12)
replace ind = 3 if peio1icd >= 4070 & peio1icd <=6060 & mdate>=tm(2003m1) & mdate<=tm(2007m12)
replace ind = 2 if peio1icd >= 1070 & peio1icd <=4060  & mdate>=tm(2003m1) & mdate<=tm(2007m12)
replace ind = 4 if peio1icd >= 6870 & peio1icd <= 7260 & mdate>=tm(2003m1) & mdate<=tm(2007m12)
replace ind = 5 if peio1icd >= 7270 & peio1icd <= 7790  & mdate>=tm(2003m1) & mdate<=tm(2007m12)
replace ind = 6 if peio1icd >= 7860 & peio1icd <=7890 & mdate>=tm(2003m1) & mdate<=tm(2007m12)
replace ind = 7 if peio1icd >= 7970 & peio1icd <=8470 & mdate>=tm(2003m1) & mdate<=tm(2007m12)
replace ind = 8 if peio1icd >= 8560 & peio1icd <=8690 & mdate>=tm(2003m1) & mdate<=tm(2007m12)
replace ind = 9 if missing(ind) & !missing(peio1icd) & peio1icd>0 & mdate>=tm(2003m1) & mdate<=tm(2007m12)

***********************New industries from Census
replace ind = 1 if peio1icd >= 60 & peio1icd <= 99 & mdate>=tm(1992m1) & mdate<=tm(2002m12)
replace ind = 2 if peio1icd > 99 & peio1icd < 400 & mdate>=tm(1992m1) & mdate<=tm(2002m12)
replace ind = 3 if peio1icd > 499 & peio1icd < 700 & mdate>=tm(1992m1) & mdate<=tm(2002m12)
replace ind = 4 if peio1icd > 699 & peio1icd < 720 & mdate>=tm(1992m1) & mdate<=tm(2002m12)
replace ind = 5 if peio1icd > 720 & peio1icd < 800 & mdate>=tm(1992m1) & mdate<=tm(2002m12)
replace ind = 6 if peio1icd > 841 & peio1icd < 894 & mdate>=tm(1992m1) & mdate<=tm(2002m12)
replace ind = 7 if peio1icd > 811 & peio1icd < 841 & mdate>=tm(1992m1) & mdate<=tm(2002m12)
replace ind = 8 if peio1icd > 799 & peio1icd < 811 & mdate>=tm(1992m1) & mdate<=tm(2002m12)
replace ind = 9 if peio1icd > 0 & !missing(peio1icd) & missing(ind)  & mdate>=tm(1992m1) & mdate<=tm(2002m12)



label define ind 1 "Construction" 2 "Manufacturing" 3 "Wholesale & Retail Trade" 4 "Financial Activities" 5 "Professional and Business Services" 6 "Educational Services" 7 "Healthcare Services" 8 "Leisure and Hospitality" 9 "All other" 
label values ind ind

gen round_wgt = round(sswgt) 


gen long_term_unemployed=1 if udur>26 & !missing(udur) & lfs==2
replace long_term_unemployed=0  if udur>=0 & udur<=26 & !missing(udur) & lfs==2
replace long_term_unemployed= 2 if lfs==1


gen year=1994
forvalues i= 1995/2013 { 
	replace year = `i' if mdate>=tm(`i'm1) & mdate<=tm(`i'm12)
}

*********data.bls.gov/pdq/SurveyOutputServlet****************
*********Update as needed*************

gen cpi = 148.2 if year == 1994
replace cpi = 152.4 if year == 1995
replace cpi = 156.9 if year == 1996
replace cpi = 160.5 if year == 1997
replace cpi = 163.0 if year == 1998
replace cpi = 166.6 if year == 1999
replace cpi = 172.2 if year == 2000
replace cpi = 177.1 if year == 2001
replace cpi = 179.9 if year == 2002
replace cpi = 184.0 if year == 2003
replace cpi = 188.9 if year == 2004
replace cpi = 195.3 if year == 2005
replace cpi = 201.6 if year == 2006
replace cpi = 207.3 if year == 2007
replace cpi = 215.3 if year == 2008
replace cpi = 214.5 if year == 2009
replace cpi = 218.1 if year == 2010
replace cpi = 224.9 if year == 2011
replace cpi = 229.6 if year == 2012
replace cpi = 233.0 if year == 2013

******Calculate hourly wage for weekly workers*****************
gen hrly_fromweekly = ernwkly/hrusl if !missing(ernwkly) & !missing(hrusl) & hrusl>0 & ernwkly>=0

gen real_hrly_fromweekly = hrly_fromweekly/cpi * 233.0

gen log_real_hourly_fromweekly = ln(real_hrly_fromweekly) if real_hrly_fromweekly>0



reg log_real_hourly_fromweekly  exper exper_sq i.educ i.ind i.occ female black white hisp married widowed_divorced [fweight = round_wgt]   if year>=2004 & year<=2006, r
predict fitted_earnval



collapse (mean) fitted_earnval [fweight = round_wgt] , by(year long_term_unemployed)

save wage_regression_BPEA, replace


